#load library
library(grid)
library(ggtern)
library(quanteda)
library(qlcMatrix)
library(stringdist)
library(vars)
library(AER)
library(readxl)
library(stargazer)
library(scales)
library(quantmod)
library(urca)
library(sandwich)
library(lmtest)
library(forecast)
library(dyn)
library(rtweet) ; library(tidyverse) ; library(tidytext)
library(twitteR)
library(igraph)
library(stringr)
library(tidygraph)
library(ggraph)
library(stringi)
library(tidyr)
library(xts)
library(DescTools)
library(ggplot2)  # For graphics
library(dplyr)  # For aggregating
library(stringr)
library(wordcloud)
library(gdata)
library(dynlm)

options(scipen=999) #avoid scientific notation on graph

#load dataset
vio_tweets <- read.csv('hk_vio_tweets.csv') 
vio_news <- read.csv('hk_vio_news.csv') 
sentiment <- read.csv('sentiment.csv') 

#change columns to datatime
vio_tweets['created_at'] <- as.Date(vio_tweets$created_at) 
sentiment$created_at <- as.Date(sentiment$created_at)
library(lubridate)
vio_news$date <- dmy(vio_news$date)

#plot frequency

sentiment %>%
  dplyr::group_by(label) %>%
  ts_plot("days") +
  labs(x = NULL, y = NULL,
       title = "Sentiment Towards Anti-Extradition Protests in 2019") +
  theme_minimal()

###clean dataframes###

require(xts)

#sentiment

sentiment_count <- sentiment %>%
  group_by(created_at) %>%
  count(label)
sentiment_count %>% print(n = Inf)

pos <- subset(sentiment_count, label == "Positive")
Negative <- subset(sentiment_count, label == "Negative")

time.min <- as.Date("2019-06-01")
time.max <- as.Date("2020-01-31")
all.dates <- seq(time.min, time.max, by="day")
all.dates.frame <- data.frame(list(created_at=all.dates))
pos <- merge(all.dates.frame, pos, all=T)
pos$label<- NULL
pos$n[which(is.na(pos$n))] <- 0

Negative <- merge(all.dates.frame, Negative, all=T)
Negative$label<- NULL
Negative$n[which(is.na(Negative$n))] <- 0
Negative <- Negative[-c(246), ]

#violence

vio_news$media <- NULL

news_count <- vio_news %>%
  group_by(date) %>%
  ungroup %>%
  count(date)
news_count %>% print(n = Inf)

names(news_count)[names(news_count) == "date"] <- "created_at"

news_count <- merge(all.dates.frame, news_count, all=T)
news_count$n[which(is.na(news_count$n))] <- 0

tweet_count <- vio_tweets %>%
  group_by(created_at) %>%
  ungroup %>%
  count(created_at)
tweet_count %>% print(n = Inf)

tweet_count <- tweet_count[-c(1:9), ]

tweet_count <- merge(all.dates.frame, tweet_count, all=T)
tweet_count$n[which(is.na(tweet_count$n))] <- 0

violence <- tweet_count$created_at

violence <- as.data.frame(violence)

violence$n <- tweet_count$n + news_count$n

names(violence)[names(violence) == "violence"] <- "created_at"

ggplot(violence, aes(x=created_at, y=n)) +
  geom_line(color="#69b3a2")+
  labs(x = "Date", y = "Frequency") +
  ggtitle("Tweets & News about Violent Tactics by Protestors")

#total count

total_count <- sentiment %>%
  group_by(created_at) %>%
  ungroup %>%
  count(created_at)
total_count %>% print(n = Inf)

total_count <- total_count[-c(245), ]

total_count <- merge(all.dates.frame, total_count, all=T)
total_count$n[which(is.na(total_count$n))] <- 0

ggplot(total_count, aes(x=created_at, y=n)) +
  geom_line(color="#7570B3")+
  labs(x = "Date", y = "Frequency") +
  ggtitle("Total Count of Tweets")

### make time series ###

pos$created_at <- NULL

pos_ts <- ts(pos)

Negative$created_at <- NULL

Negative <- ts(Negative)

violence$created_at <- NULL

violence <- ts(violence)

total_count$created_at <- NULL

total_ts <- ts(total_count)

### regression ###

library(ggfortify)

Violence <- violence

m1 <- cbind(Negative, Violence)

autoplot(m1, facets = TRUE) + 
  xlab("Time") + ylab("Frequency") +
  theme_bw()+ 
  theme(text = element_text(size=25))


m1_ts

m1 %>%
  as.data.frame() %>%
  ggplot(aes(x=violence, y=pos_ts)) +
  ylab("Negative Sentiment") +
  xlab("Reported Violence") +
  geom_point() +
  geom_smooth(method="lm", se=FALSE) +
  theme_bw()+ 
  theme(text = element_text(size=25))

tslm(neg ~ violence, data=m1)

fit.m1 <- tslm(
  neg ~ violence,
  data=m1)
summary(fit.m1)

checkresiduals(fit.m1)

m1[is.na(m1)] <- 0

lagselect_m1 <- VARselect(m1, lag.max = 10, type = "const")

lagselect_m1$selection # use 1 lag

m1_lag_5 <- VAR(m1, p=5, type = "const", season = NULL, exogen = NULL) 

summary(m1_lag_5)

m1_dynlm_5 <- dynlm(neg ~  L(neg, 5) + L(violence, 5))

coeftest(m1_dynlm_5, vcov. = sandwich)

m1_dynlm_9 <- dynlm(neg ~  L(neg, 9) + L(violence, 9))

coeftest(m1_dynlm_9, vcov. = sandwich)

summary(m1_dynlm_5)

summary(m1_dynlm_9)

granger_m1 <- causality(m1_lag_5, cause = "violence")

granger_m1

###take log:

(lambda_neg <- BoxCox.lambda(neg))

(lambda_vio <- BoxCox.lambda(violence))

neg_log <- BoxCox(neg, lambda_neg)

vio_log <- BoxCox(violence, lambda_vio)

m1_log <- cbind(neg_log, vio_log)

fit.m1_log <- tslm(
  vio_log ~ neg_log,
  data=m1_log)
summary(fit.m1_log)

checkresiduals(fit.m1_log)



###########################################

Tweets <- total_ts

Violence <- violence

m2 <- cbind(Tweets, Violence)

autoplot(m2, facets = TRUE) + 
  xlab("Time") + ylab("Frequency") +
  theme_bw()+ 
  theme(text = element_text(size=25))

m2_ts <- autoplot(m2[,c("total_ts","violence")]) + ylab("Frequency") + xlab("Date")

p <- autoplot(stl(m2)) + 
  xlab("Time") + ylab("Frequency") +
  theme_bw()

g <- ggplot_build(p)

g$panel$layout$variable <- c("Total Tweets", "Reported Violence")

grid.draw(ggplot_gtable(g))

m2_ts + ggtitle('Violence & Total Tweets')

m2 %>%
  as.data.frame() %>%
  ggplot(aes(x=violence, y=total_ts)) +
  ylab("Total Tweets") +
  xlab("Violence") +
  geom_point() +
  ggtitle('Violence vs Total Tweets') +
  geom_smooth(method="lm", se=FALSE)

fit.m2 <- tslm(
  total_ts ~ violence,
  data=m2)
summary(fit.m2)

checkresiduals(fit.m2)

m2[is.na(m1)] <- 0

lagselect_m2 <- VARselect(m2, lag.max = 10, type = "const")

lagselect_m2$selection # use 1 lag

m2_lag_10 <- VAR(m2, p=10, type = "const", season = NULL, exogen = NULL) 

summary(m2_lag_10)

m2_dynlm_10 <- dynlm(total_ts ~  L(total_ts, 10) + L(violence, 10))

coeftest(m2_dynlm_10, vcov. = sandwich)


summary(m2_dynlm_10)

granger_m2 <- causality(m2_lag_10, cause = "violence")

granger_m2
